* THIS DO-FILE GENERATES THE PANEL OF INDIVIDUALS, THEIR INCOME AND DEMOGRAPHICS USING THE BEF, IDAP, AKM, UDDA AND IND REGISTRIES

clear
global raw "Path to rawdata"

***************************************************
**** RAW DATA
set more off
forvalues i = 1995/2007 {
	di `i'
	use $raw/pop_pnr, clear
	gen year=`i'
	merge 1:1 pnr using $raw/bef`i', nogen norep keep(mat) keepus(civst efalle MOR_ID FAR_ID alder koen)
	merge 1:1 pnr using $raw/idap`i', nogen norep keep(mat mas) keepus(pstill sstill)
	merge 1:1 pnr using $raw/akm`i', nogen norep keep(mat mas)
	merge 1:1 pnr using $raw/udda`i', nogen norep keep(mat mas) keepus(hfaudd)
	merge 1:1 pnr using $raw/ind`i', nogen norep keep(mat mas) keepus(PERINDK* perindk* SAMLINK* PERSAMLINK* qsplind* erhvervsi* ERHVERVSI* netov* NETOV*)
	if year>1995 {
		qui append using temp.dta, force
		}
	qui save temp.dta, replace
	}
forvalues i = 2008/2013 {
	di `i'
	use $raw/pop_pnr, clear
	gen year=`i'
	merge 1:1 pnr using $raw/bef`i', nogen norep keep(mat) keepus(civst efalle MOR_ID FAR_ID alder koen)
	if year<2014 {
		merge 1:1 pnr using $raw/idap`i'_old, nogen norep keep(mat mas) keepus(pstill sstill)
	}
	merge 1:1 pnr using $raw/idap`i', nogen norep keep(mat mas)	
	merge 1:1 pnr using $raw/akm`i', nogen norep keep(mat mas)
	merge 1:1 pnr using $raw/udda`i', nogen norep keep(mat mas) keepus(hfaudd)
	merge 1:1 pnr using $raw/ind`i', nogen norep keep(mat mas) keepus(PERINDK* perindk* SAMLINK* PERSAMLINK* qsplind* erh* ERH* net* NET*)
	qui append using temp.dta, force
	qui save temp.dta, replace
	}
forvalues i = 2014/2015 {
	di `i'
	use $raw/pop_pnr, clear
	gen year=`i'
	merge 1:1 pnr using $raw/bef`i', nogen norep keep(mat) keepus(civst efalle MOR_ID FAR_ID alder koen)
	if year<2014 {
		merge 1:1 pnr using $raw/idap`i'_old, nogen norep keep(mat mas) keepus(pstill sstill)
	}
	merge 1:1 pnr using $raw/idap`i', nogen norep keep(mat mas)	
	merge 1:1 pnr using $raw/akm`i', nogen norep keep(mat mas)
	merge 1:1 pnr using $raw/udda`i', nogen norep keep(mat mas) keepus(hfaudd)
	merge 1:1 pnr using $raw/ind`i', nogen norep keep(mat mas) keepus(PERINDK* perindk* qsplind* erh* ERH* net* NET*)
	qui append using temp.dta, force
	qui save temp.dta, replace
	}
	
* EARNING AND INCOME (EARNINGS FROM DIFFERENT VARIABLES OVER TIME AND FROM DIFFERENT FIELDS IN THE TAX FORM)	
gen earn=ERHVERVSINDK_13
replace earn=erhvervsindk if earn==. | earn<0 | (earn<erhvervsindk & erhvervsindk!=. & earn!=.)
replace earn=ERHVERVSINDK_GL if earn==. | earn<0 | (earn<ERHVERVSINDK_GL & ERHVERVSINDK_GL!=. & earn!=.)
replace earn=erhvervsindk_13 if earn==. | earn<0 | (earn<erhvervsindk_13 & erhvervsindk_13!=. & earn!=.)
replace earn=NETOVSKUD_13 if earn==. | earn==0
replace earn=NETOVSKUD_GL if earn==. | earn==0
replace earn=netovskud if earn==. | earn==0
replace earn=netovskud_13 if earn==. | earn==0
replace earn=0 if earn<0
gen income=PERINDKIALT_13
	replace income=perindkp if income==.
gen taxinc=qsplindk

* CONVERTING TO 2015 DANISH KRONER
foreach x of varlist earn income taxinc {
	replace `x'=0 if `x'<0
	replace `x'=`x'*100/99.6 if year==2014
	replace `x'=`x'*100/99 if year==2013
	replace `x'=`x'*100/98.2 if year==2012
	replace `x'=`x'*100/95.9 if year==2011
	replace `x'=`x'*100/93.3 if year==2010	
	replace `x'=`x'*100/91.2 if year==2009	
	replace `x'=`x'*100/90.1 if year==2008
	replace `x'=`x'*100/87.1 if year==2007
	replace `x'=`x'*100/85.6 if year==2006	
	replace `x'=`x'*100/84 if year==2005
	replace `x'=`x'*100/82.5 if year==2004
	replace `x'=`x'*100/81.6 if year==2003
	replace `x'=`x'*100/79.9 if year==2002
	replace `x'=`x'*100/78 if year==2001
	replace `x'=`x'*100/76.2 if year==2000
	replace `x'=`x'*100/74.1 if year==1999
	replace `x'=`x'*100/72.3 if year==1998
	replace `x'=`x'*100/71 if year==1997
	replace `x'=`x'*100/69.5 if year==1996
	replace `x'=`x'*100/68 if year==1995
}	

* DEMOGRAPHICS
gen male=koen==1
gen female=koen==2
ren alder age
gen birthyear=year-age
destring pstill sstill SSOC* PSOC*, replace

compress
save panelraw_v7, replace
erase temp.dta

***************************************************
**** VARIABLES

use panelraw_v7, clear
sort pnr year

* EDUCATION DUMMIES
merge m:1 hfaudd using key_to_convert_audd_to_main_groups, keep(mat mas) nogen
gen gymnasium=audd_main_num>=20
replace gymnasium=. if audd_main_num==.
gen college=audd_main_num>=50
replace college=. if audd_main_num==.
gen bachelor=audd_main_num>=60
replace bachelor=. if audd_main_num==.
drop audd_*

* DISABILITY
gen lms_disability=pstill==93 | PSOC_STATUS_KODE==411 | SOCIO13==321
destring DISCO*, replace
sort pnr year
compress
	label var year "Year"
	label var birthyear "Cohort"
	label var age "Age"
	label var lms_disability "Disability"
	label var earn "Earnings"

preserve
drop DISCO*
save person-panel_v7, replace
restore

* SAVING ISCO-OCCUPATION CODES FOR LATER
keep pnr year DISCO*
save person-isco_v1, replace

***************************************************
**** DIAGNOSES, MEDICATION, PARENTAL WEALTH

do diagnoses_v6_final.do
do medication_v6_final.do
do parents_v6_final.do

********************************************************************************
**** COLLECTING DATA IN A PANEL

set more off		
use person-panel_v7, clear
gen post=birthyear>1956
drop MOR_ID FAR_ID	
merge m:1 pnr using parents_id, nogen keepus(MOR_ID FAR_ID)	
merge 1:1 pnr age using diagnoses_v6, keep(mat mas) nogen
merge 1:1 pnr year using medication_v6, keep(mat mas) nogen
compress
save fullsample_v7, replace

set more off
use fullsample_v7, clear
keep if age>=20 & age<=65
keep if birthyear>=1940 & birthyear<1978
merge m:1 pnr using parents_v6, nogen keep(mat mas)
compress
save reducedsample_v7_incl65yo, replace

clear
